/*
* Copyright (C) 2010-2011 Mamadou Diop.
*
* Contact: Mamadou Diop <diopmamadou(at)doubango.org>
*	
* This file is part of Open Source Doubango Framework.
*
* DOUBANGO is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*	
* DOUBANGO is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*	
* You should have received a copy of the GNU General Public License
* along with DOUBANGO.
*
*/
/*
 * Copyright (c) Microsoft Corporation. All rights reserved.
 */
#include <tinydshow/Resizer.h>

//
// Resize function
//
void ResizeRGB( BITMAPINFOHEADER *pbiIn,    //Src's BitMapInFoHeader
			   const unsigned char * dibBits,    //Src bits
			   BITMAPINFOHEADER *pbiOut,
			   unsigned char *pFrame,    //Dst bits
			   int iNewWidth,            //new W in pixel
			   int iNewHeight)           //new H in pixel
{	
	StretchDIB(	pbiOut,				//	--> BITMAPINFO of destination
		pFrame,             //  --> to destination bits
		0,                  //  Destination origin - x coordinate
		0,                  //  Destination origin - y coordinate
		iNewWidth,          //  x extent of the BLT
		iNewHeight,         //  y extent of the BLT
		pbiIn,   			//	--> BITMAPINFO of destination
		(void*) dibBits,    //  --> to source bits
		0,                  //  Source origin - x coordinate
		0,                  //  Source origin - y coordinate
		pbiIn->biWidth,		//  x extent of the BLT
		pbiIn->biHeight		//  y extent of the BLT
		);

	return;
}


/* -------------------------------------------------------------------- */

/*
* StretchFactor
*
* calculate the stretch factor (proportion of source extent to destination
* extent: 1:1, 1:2, 1:4, 1:N, N:1, 4:1,or 2:1) and also the
* delta fraction (see above comment on X_FUNC). This is the ratio of
* the smaller extent to the larger extent, represented as a fraction
* multiplied by 65536.
*
* returns: the stretch factor  (stores the delta fraction in *pfract)
*/

int
StretchFactor(int SrcE, int DstE, int *pfract)
{


	if (SrcE == DstE) {
		if (pfract != NULL) {
			pfract = 0;	     	
		}

		return(STRETCH_1_1);

	}


	if (SrcE > DstE) {
		if (pfract != NULL) {
			*pfract = ( (DstE << 16) / SrcE) & 0xffff;
		}

		if (SrcE == (DstE * 2)) {
			return(STRETCH_2_1);
		} else if (SrcE == (DstE * 4)) {
			return(STRETCH_4_1);
		} else {
			return(STRETCH_N_1);
		}

	} else {

		/* calculate delta fraction based on smallest / largest */
		if (pfract != NULL) {
			*pfract = ( (SrcE << 16) / DstE) & 0xffff;
		}

		if (DstE == (SrcE * 2)) {
			return(STRETCH_1_2);
		} else if (DstE == (SrcE * 4)) {
			return(STRETCH_1_4);
		} else {
			return(STRETCH_1_N);
		}
	}
}


/* -------------------------------------------------------------------- */

/*
* StretchDIB
*
*/

void FAR PASCAL
StretchDIB(
		   LPBITMAPINFOHEADER biDst,   //	--> BITMAPINFO of destination
		   LPVOID	lpvDst,		    //	--> to destination bits
		   int	DstX,		    //	Destination origin - x coordinate
		   int	DstY,		    //	Destination origin - y coordinate
		   int	DstXE,		    //	x extent of the BLT
		   int	DstYE,		    //	y extent of the BLT
		   LPBITMAPINFOHEADER biSrc,   //	--> BITMAPINFO of source
		   LPVOID	lpvSrc,		    //	--> to source bits
		   int	SrcX,		    //	Source origin - x coordinate
		   int	SrcY,		    //	Source origin - y coordinate
		   int	SrcXE,		    //	x extent of the BLT
		   int	SrcYE	 	    //	y extent of the BLT
		   )
{

	int nBits;
	int SrcWidth, DstWidth;
	LPBYTE lpDst = (LPBYTE)lpvDst, lpSrc = (LPBYTE)lpvSrc;
	int x_fract;
	int x_factor;
	int y_factor;
	X_FUNC xfunc;


	/*
	* chek that sizes are not same
	*/
	/*if(DstXE == SrcXE && DstYE == SrcYE)
	{
	return;
	}*/
	/*
	* check that bit depths are same and 8, 16 or 24
	*/

	if ((nBits = biDst->biBitCount) != biSrc->biBitCount) {
		return;
	}

	if ( (nBits != 8 ) && (nBits != 16) && (nBits != 24) &&
		(nBits != 32)) {
			return;
	}

	/*
	* check that extents are not bad
	*/
	if ( (SrcXE <= 0) || (SrcYE <= 0) || (DstXE <= 0) || (DstYE <= 0)) {
		return;
	}

	/*
	* calculate width of one scan line in bytes, rounded up to
	* DWORD boundary.
	*/
	SrcWidth = (((biSrc->biWidth * nBits) + 31) & ~31) / 8;
	DstWidth = (((biDst->biWidth * nBits) + 31) & ~31) / 8;

	/*
	* set initial source and dest pointers
	*/
	lpSrc += (SrcY * SrcWidth) + ((SrcX * nBits) / 8);
	lpDst += (DstY * DstWidth) + ((DstX * nBits) / 8);


	/*
	* calculate stretch proportions (1:1, 1:2, 1:N, N:1 etc) and
	* also the fractional stretch factor. (we are not interested in
	* the y stretch fraction - this is only used in x stretching.
	*/

	y_factor = StretchFactor(SrcYE, DstYE, NULL);
	x_factor = StretchFactor(SrcXE, DstXE, &x_fract);

	/*
	* we have special case routines for 1:2 in both dimensions
	* for 8 and 16 bits
	*/
	if ((y_factor == x_factor) && (y_factor == STRETCH_1_2)) {

		if (nBits == 8) {
			//StartCounting();
			Stretch_1_2_8Bits(lpSrc, lpDst, SrcXE, SrcYE,
				DstXE, DstYE, SrcWidth, DstWidth,
				x_fract);
			//EndCounting("8 bit");
			return;

		} else if (nBits == 16) {
			//StartCounting();
			Stretch_1_2_16Bits(lpSrc, lpDst, SrcXE, SrcYE,
				DstXE, DstYE, SrcWidth, DstWidth,
				x_fract);
			//EndCounting("16 bit");
			return;
		}
	}


	/* pick an X stretch function */
	switch(nBits) {

	case 8:
		switch(x_factor) {
	case STRETCH_1_1:
		xfunc = X_Stretch_1_1_8Bits;
		break;

	case STRETCH_1_2:
		xfunc = X_Stretch_1_2_8Bits;
		break;

	case STRETCH_1_4:
		xfunc = X_Stretch_1_4_8Bits;
		break;

	case STRETCH_1_N:
		xfunc = X_Stretch_1_N_8Bits;
		break;

	case STRETCH_N_1:
	case STRETCH_4_1:
	case STRETCH_2_1:
		xfunc = X_Stretch_N_1_8Bits;
		break;

		}
		break;

	case 16:
		switch(x_factor) {
	case STRETCH_1_1:
		xfunc = X_Stretch_1_1_16Bits;
		break;

	case STRETCH_1_2:
		xfunc = X_Stretch_1_2_16Bits;
		break;

	case STRETCH_1_4:
	case STRETCH_1_N:
		xfunc = X_Stretch_1_N_16Bits;
		break;

	case STRETCH_N_1:
	case STRETCH_4_1:
	case STRETCH_2_1:
		xfunc = X_Stretch_N_1_16Bits;
		break;

		}
		break;

	case 24:
		switch(x_factor) {
	case STRETCH_1_1:
		xfunc = X_Stretch_1_1_24Bits;
		break;

	case STRETCH_1_2:
	case STRETCH_1_4:
	case STRETCH_1_N:
		xfunc = X_Stretch_1_N_24Bits;
		break;

	case STRETCH_N_1:
	case STRETCH_4_1:
	case STRETCH_2_1:
		xfunc = X_Stretch_N_1_24Bits;
		break;

		}
		break;

	case 32:
		switch(x_factor) {
	case STRETCH_1_1:
		xfunc = X_Stretch_1_1_32Bits;
		break;

	case STRETCH_1_2:
	case STRETCH_1_4:
	case STRETCH_1_N:
		xfunc = X_Stretch_1_N_32Bits;
		break;

	case STRETCH_N_1:
	case STRETCH_4_1:
	case STRETCH_2_1:
		xfunc = X_Stretch_N_1_32Bits;
		break;

		}
		break;

	}


	/*
	* now call appropriate stretching function depending
	* on the y stretch factor
	*/
	switch (y_factor) {
	case STRETCH_1_1:
	case STRETCH_1_2:
	case STRETCH_1_4:
	case STRETCH_1_N:
		Y_Stretch_1_N(lpSrc, lpDst, SrcXE, SrcYE,
			DstXE, DstYE, SrcWidth, DstWidth, x_fract, xfunc, nBits);
		break;

	case STRETCH_N_1:
	case STRETCH_4_1:
	case STRETCH_2_1:
		Y_Stretch_N_1(lpSrc, lpDst, SrcXE, SrcYE,
			DstXE, DstYE, SrcWidth, DstWidth, x_fract, xfunc);
		break;

	}
	return;
}


/* ---- y stretching -------------------------------------------- */

/*
* call an X_FUNC to copy scanlines from lpSrc to lpDst. Duplicate or
* omit scanlines to stretch SrcYE to DstYE.
*/


/*
* Y_Stretch_1_N
*
* write DstYE scanlines based on SrcYE scanlines, DstYE > SrcYE
*
*/

void
Y_Stretch_1_N(LPBYTE lpSrc,
			  LPBYTE lpDst,
			  int SrcXE,
			  int SrcYE,
			  int DstXE,
			  int DstYE,
			  int SrcWidth,
			  int DstWidth,
			  int x_fract,
			  X_FUNC x_func,
			  int nBits)
{

	int ydelta;
	register int i;
	LPBYTE lpPrev = NULL;

	ydelta = DstYE -1;

	for (i = 0; i < DstYE; i++) {

		/* have we already stretched this scanline ? */
		if (lpPrev == NULL) {
			/* no - copy one scanline */
			(*x_func)(lpSrc, lpDst, SrcXE, DstXE, x_fract);
			lpPrev = lpDst;
		} else {	
			/* yes - this is a duplicate scanline. do
			* a straight copy of one that has already
			* been stretched/shrunk
			*/
			X_CopyScanline(lpPrev, lpDst, DstXE * nBits / 8);
		}

		/* advance dest pointer */
		lpDst += DstWidth;

		/* should we advance source pointer this time ? */
		if ( (ydelta -= SrcYE) < 0) {
			ydelta += DstYE;
			lpSrc += SrcWidth;
			lpPrev = NULL;
		}
	}
}


/*
* Y_Stretch_N_1
*
* write DstYE scanlines based on SrcYE scanlines, DstYE < SrcYE
*
*/
void
Y_Stretch_N_1(LPBYTE lpSrc,
			  LPBYTE lpDst,
			  int SrcXE,
			  int SrcYE,
			  int DstXE,
			  int DstYE,
			  int SrcWidth,
			  int DstWidth,
			  int x_fract,
			  X_FUNC x_func)
{

	int ydelta;
	register int i;

	ydelta = SrcYE -1;

	for (i = 0; i < DstYE; i++) {

		/* copy one scanline */
		(*x_func)(lpSrc, lpDst, SrcXE, DstXE, x_fract);

		/* advance dest pointer */
		lpDst += DstWidth;

		/* how many times do we advance source pointer this time ? */
		do {
			lpSrc += SrcWidth;
			ydelta -= DstYE;
		} while (ydelta >= 0);

		ydelta += SrcYE;
	}
}

/* ---8-bit X stretching -------------------------------------------------- */

/*
* X_Stretch_1_N_8Bits
*
* copy one scan line, stretching 1:N (DstXE > SrcXE). For 8-bit depth.
*/
void
X_Stretch_1_N_8Bits(LPBYTE lpSrc,
					LPBYTE lpDst,
					int SrcXE,
					int DstXE,
					int x_fract)
{
	int xdelta;
	register int i;

	xdelta = DstXE -1;

	for (i = 0; i < DstXE; i++) {

		/* copy one byte and advance dest */
		*lpDst++ = *lpSrc;

		/* should we advance source pointer this time ? */
		if ( (xdelta -= SrcXE) < 0) {
			xdelta += DstXE;
			lpSrc++;
		}
	}
}


/*
* X_Stretch_N_1_8Bits
*
* copy one scan line, shrinking N:1 (DstXE < SrcXE). For 8-bit depth.
*/
void
X_Stretch_N_1_8Bits(LPBYTE lpSrc,
					LPBYTE lpDst,
					int SrcXE,
					int DstXE,
					int x_fract)
{
	int xdelta;
	register int i;

	xdelta = SrcXE -1;

	for (i = 0; i < DstXE; i++) {

		/* copy one byte and advance dest */
		*lpDst++ = *lpSrc;

		/* how many times do we advance source pointer this time ? */
		do {
			lpSrc++;
			xdelta -= DstXE;
		} while (xdelta >= 0);

		xdelta += SrcXE;
	}
}

/*
* copy one scanline of count bytes from lpSrc to lpDst. used by 1:1
* scanline functions for all bit depths
*/
void
X_CopyScanline(LPBYTE lpSrc, LPBYTE lpDst, int count)
{
	register int i;

	/*
	* if the alignment of lpSrc and lpDst is the same, then
	* we can get them aligned and do a faster copy
	*/
	if (((DWORD_PTR) lpSrc & 0x3) == ( (DWORD_PTR) lpDst & 0x3)) {

		/* align on WORD boundary */
		if ( (DWORD_PTR) lpSrc & 0x1) {
			*lpDst++ = *lpSrc++;
			count--;
		}

		/* align on DWORD boundary */
		if ((DWORD_PTR) lpSrc & 0x2) {
			* ((LPWORD) lpDst) = *((LPWORD) lpSrc);
			lpDst += sizeof(WORD);
			lpSrc += sizeof(WORD);
			count -= sizeof(WORD);
		}

		/* copy whole DWORDS */
		for ( i = (count / 4); i > 0; i--) {
			*((LPDWORD) lpDst) =  *((LPDWORD) lpSrc);
			lpSrc += sizeof(DWORD);
			lpDst += sizeof(DWORD);
		}
	} else {
		/* the lpSrc and lpDst pointers are different
		* alignment, so leave them unaligned and
		* copy all the whole DWORDs
		*/
		for (i = (count / 4); i> 0; i--) {
			*( (DWORD UNALIGNED FAR *) lpDst) =
				*((DWORD UNALIGNED FAR *) lpSrc);
			lpSrc += sizeof(DWORD);
			lpDst += sizeof(DWORD);
		}
	}

	/* in either case, copy last (up to 3) bytes. */
	for ( i = count % 4; i > 0; i--) {
		*lpDst++ = *lpSrc++;
	}
}

/*
* X_Stretch_1_1_8Bits
*
* copy a scanline with no change (1:1)
*/
void
X_Stretch_1_1_8Bits(LPBYTE lpSrc,
					LPBYTE lpDst,
					int SrcXE,
					int DstXE,
					int x_fract)
{

	X_CopyScanline(lpSrc, lpDst, DstXE);
}


/*
* X_Stretch_1_2_8Bits
*
* copy a scanline, doubling all the pixels (1:2)
*/
void
X_Stretch_1_2_8Bits(LPBYTE lpSrc,
					LPBYTE lpDst,
					int SrcXE,
					int DstXE,
					int x_fract)
{
	WORD wPix;
	register int i;

	for (i = 0; i < SrcXE; i++) {

		/* get a pixel and double it */
		wPix = *lpSrc++;
		wPix |= (wPix << 8);
		* ((WORD UNALIGNED *) lpDst) = wPix;
		lpDst += sizeof(WORD);
	}
}


/*
* X_Stretch_1_4_8Bits
*
* copy a scanline, quadrupling all the pixels (1:4)
*/
void
X_Stretch_1_4_8Bits(LPBYTE lpSrc,
					LPBYTE lpDst,
					int SrcXE,
					int DstXE,
					int x_fract)
{
	DWORD dwPix;
	register int i;

	for (i = 0; i < SrcXE; i++) {

		/* get a pixel and make four copies of it */
		dwPix = *lpSrc++;
		dwPix |= (dwPix <<8);
		dwPix |= (dwPix << 16);
		* ((DWORD UNALIGNED *) lpDst) = dwPix;
		lpDst += sizeof(DWORD);
	}
}


/*  -- 16-bit X functions -----------------------------------------------*/

/*
* copy one scan-line of 16 bits with no change (1:1)
*/
void
X_Stretch_1_1_16Bits(LPBYTE lpSrc,
					 LPBYTE lpDst,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{

	X_CopyScanline(lpSrc, lpDst, DstXE * sizeof(WORD));

}


/*
* copy one scanline of 16 bpp duplicating each pixel
*/
void
X_Stretch_1_2_16Bits(LPBYTE lpSrc,
					 LPBYTE lpDst,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{

	DWORD dwPix;
	register int i;

	for (i = 0; i < SrcXE; i++) {

		/* get a pixel and double it */
		dwPix = * ((WORD *)lpSrc);
		dwPix |= (dwPix << 16);
		* ((DWORD UNALIGNED *) lpDst) = dwPix;

		lpDst += sizeof(DWORD);
		lpSrc += sizeof(WORD);
	}

}

/*
* copy one scanline of 16 bits, stretching 1:n (dest > source)
*/
void
X_Stretch_1_N_16Bits(LPBYTE lpSrc,
					 LPBYTE lpDst,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{
	int xdelta;
	register int i;

	xdelta = DstXE -1;

	for (i = 0; i < DstXE; i++) {

		/* copy one pixel and advance dest */
		*((WORD *) lpDst) = *((WORD *) lpSrc);

		lpDst += sizeof(WORD);

		/* should we advance source pointer this time ? */
		if ( (xdelta -= SrcXE) < 0) {
			xdelta += DstXE;
			lpSrc += sizeof(WORD);
		}
	}
}

/*
* copy one scanline of 16bits, shrinking n:1 (dest < source)
*/
void
X_Stretch_N_1_16Bits(LPBYTE lpSrc,
					 LPBYTE lpDst,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{

	int xdelta;
	register int i;

	xdelta = SrcXE -1;

	for (i = 0; i < DstXE; i++) {

		/* copy one pixel and advance dest */
		*((WORD *) lpDst) = *((WORD *)lpSrc);

		lpDst += sizeof(WORD);

		/* how many times do we advance source pointer this time ? */
		do {
			lpSrc += sizeof(WORD);
			xdelta -= DstXE;
		} while (xdelta >= 0);

		xdelta += SrcXE;
	}

}


/* 24-bits ---------------------------------------------------------*/

/*
* copy one 24-bpp scanline as is (1:1)
*/
void
X_Stretch_1_1_24Bits(LPBYTE lpSrc,
					 LPBYTE lpDst,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{
	X_CopyScanline(lpSrc, lpDst, DstXE * 3);
}

/*
* copy one 24-bpp scanline stretching 1:n (dest > source)
*/
void
X_Stretch_1_N_24Bits(LPBYTE lpSrc,
					 LPBYTE lpDst,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{

	int xdelta;
	register int i;

	xdelta = DstXE -1;

	for (i = 0; i < DstXE; i++) {
		/* copy first word of pixel and advance dest */
		*((WORD UNALIGNED *) lpDst) = *((WORD UNALIGNED *) lpSrc);

		lpDst += sizeof(WORD);

		/* copy third byte and advance dest */
		*lpDst++ = lpSrc[sizeof(WORD)];

		/* should we advance source pointer this time ? */
		if ( (xdelta -= SrcXE) < 0) {
			xdelta += DstXE;
			lpSrc += 3;
		}
	}
}

/*
* copy one scanline of 24 bits, shrinking n:1 (dest < source)
*/
void
X_Stretch_N_1_24Bits(LPBYTE lpSrc,
					 LPBYTE lpDst,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{
	int xdelta;
	register int i;

	xdelta = SrcXE -1;

	for (i = 0; i < DstXE; i++) {

		/* copy first word of pixel and advance dest */
		*((WORD UNALIGNED *) lpDst) = *((WORD UNALIGNED *) lpSrc);

		lpDst += sizeof(WORD);

		/* copy third byte and advance dest */
		*lpDst++ = lpSrc[sizeof(WORD)];


		/* how many times do we advance source pointer this time ? */
		do {
			lpSrc += 3;
			xdelta -= DstXE;
		} while (xdelta >= 0);

		xdelta += SrcXE;
	}
}		


/* 32-bits ---------------------------------------------------------*/

/*
* copy one 32-bpp scanline as is (1:1)
*/
void
X_Stretch_1_1_32Bits(LPBYTE lpSrc,
					 LPBYTE lpDst,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{
	X_CopyScanline((BYTE*) lpSrc, (BYTE*) lpDst, DstXE * sizeof( RGBQUAD ) );
}

/*
* copy one 32-bpp scanline stretching 1:n (dest > source)
*/
void
X_Stretch_1_N_32Bits(LPBYTE lpSrc0,
					 LPBYTE lpDst0,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{

	int xdelta;
	register int i;

	RGBQUAD *lpSrc=(RGBQUAD *)lpSrc0;
	RGBQUAD *lpDst=(RGBQUAD *)lpDst0;


	xdelta = DstXE -1;

	for (i = 0; i < DstXE; i++) 
	{
		/* copy first word of pixel and advance dest */
		*lpDst = *lpSrc;
		lpDst++;

		/* should we advance source pointer this time ? */
		if ( (xdelta -= SrcXE) < 0) 
		{
			xdelta += DstXE;
			lpSrc++;
		}
	}
}

/*
* copy one scanline of 32 bits, shrinking n:1 (dest < source)
*/
void
X_Stretch_N_1_32Bits(LPBYTE lpSrc0,
					 LPBYTE lpDst0,
					 int SrcXE,
					 int DstXE,
					 int x_fract)
{
	int xdelta;
	register int i;

	RGBQUAD *lpSrc=(RGBQUAD *)lpSrc0;
	RGBQUAD *lpDst=(RGBQUAD *)lpDst0;

	xdelta = SrcXE -1;

	for (i = 0; i < DstXE; i++) 
	{
		*lpDst = *lpSrc;
		lpDst++;

		/* how many times do we advance source pointer this time ? */
		do 
		{
			lpSrc++;
			xdelta -= DstXE;
		} while (xdelta >= 0);

		xdelta += SrcXE;
	}
}        




/* -- special-case 1:2 -------------------------------------------*/

/*
* stretch 1:2 in both directions, for 8 bits.
*
* An experiment was done on x86 to only write every other line during
* the stretch and when the whole frame was done to use memcpy to fill
* in the gaps.  This is slower than doing the stretch in a single pass.
*/
void
Stretch_1_2_8Bits(LPBYTE lpSrc, LPBYTE lpDst, int SrcXE,int SrcYE, int DstXE,
				  int DstYE, int SrcWidth, int DstWidth, int x_fract)
{

	int SrcInc, DstInc;
	register int i, j;
	WORD wPix;
	DWORD dwPix4;

	/* amount to advance source by at the end of each scan */
	SrcInc = SrcWidth - SrcXE;


	/* amount to advance dest by at the end of each scan - note
	* that we write two scans at once, so advance past the next
	* scan line
	*/
	DstInc = (DstWidth * 2) - DstXE;

	/*
	* we would like to copy the pixels DWORD at a time. this means
	* being aligned. if we are currently aligned on a WORD boundary,
	* then copy one pixel to get aligned. If we are on a byte
	* boundary, we can never get aligned, so use the slower loop.
	*/
	if ( ((DWORD_PTR)lpDst) & 1) {

		/*
		* dest is byte aligned - so we can never align it
		* by writing WORDs - use slow loop.
		*/
		for (i = 0; i < SrcYE; i++) {

			for (j = 0; j < SrcXE; j++) {

				/* get a pixel and double it */

				wPix = *lpSrc++;
				wPix |= (wPix<<8);


				/* write doubled pixel to this scanline */

				*( (WORD UNALIGNED *) lpDst) = wPix;

				/* write double pixel to next scanline */
				*( (WORD UNALIGNED *) (lpDst + DstWidth)) = wPix;

				lpDst += sizeof(WORD);
			}
			lpSrc += SrcInc;
			lpDst += DstInc;
		}
		return;
	}

	/*
	* this will be the aligned version. align each scan line
	*/
	for ( i = 0; i < SrcYE; i++) {

		/* count of pixels remaining */
		j = SrcXE;

		/* align this scan line */
		if (((DWORD_PTR)lpDst) & 2) {

			/* word aligned - copy one doubled pixel and we are ok */
			wPix = *lpSrc++;
			wPix |= (wPix << 8);

			*( (WORD *) lpDst) = wPix;
			*( (WORD *) (lpDst + DstWidth)) = wPix;
			lpDst += sizeof(WORD);

			j -= 1;
		}


		/* now dest is aligned - so loop eating two pixels at a time
		* until there is at most one left
		*/
		for ( ; j > 1; j -= 2) {

			/* read two pixels and double them */
			wPix = * ((WORD UNALIGNED *) lpSrc);
			lpSrc += sizeof(WORD);

			dwPix4 = (wPix & 0xff) | ((wPix & 0xff) << 8);
			dwPix4 |= ((wPix & 0xff00) << 8) | ((wPix & 0xff00) << 16);
			*((DWORD *) lpDst) = dwPix4;
			*((DWORD *) (lpDst + DstWidth)) = dwPix4;

			lpDst += sizeof(DWORD);
		}

		/* odd byte remaining ? */
		if (j > 0) {
			/* word aligned - copy one doubled pixel and we are ok */
			wPix = *lpSrc++;
			wPix |= (wPix << 8);

			*( (WORD *) lpDst) = wPix;
			*( (WORD *) (lpDst + DstWidth)) = wPix;
			lpDst += sizeof(WORD);

			j -= 1;
		}
		lpSrc += SrcInc;
		lpDst += DstInc;
	}
}



/* ----------------------------------------------------------------*/

/*
* stretch 1:2 in both directions, for 16-bits
*/

void
Stretch_1_2_16Bits(LPBYTE lpSrc, LPBYTE lpDst, int SrcXE,int SrcYE, int DstXE,
				   int DstYE, int SrcWidth, int DstWidth, int x_fract)

{
	int SrcInc, DstInc;
	register int i, j;
	DWORD dwPix;

	/* amount to advance source by at the end of each scan */
	SrcInc = SrcWidth - (SrcXE * sizeof(WORD));


	/* amount to advance dest by at the end of each scan - note
	* that we write two scans at once, so advance past the next
	* scan line
	*/
	DstInc = (DstWidth * 2) - (DstXE * sizeof(WORD));

	for (i = 0; i < SrcYE; i++) {

		for (j = 0; j < SrcXE; j++) {

			/* get a pixel and double it */

			dwPix = *((WORD *)lpSrc);
			dwPix |= (dwPix<<16);

			lpSrc += sizeof(WORD);

			/* write doubled pixel to this scanline */

			*( (DWORD UNALIGNED *) lpDst) = dwPix;

			/* write double pixel to next scanline */
			*( (DWORD UNALIGNED *) (lpDst + DstWidth)) = dwPix;

			lpDst += sizeof(DWORD);
		}
		lpSrc += SrcInc;
		lpDst += DstInc;

	}
}
